import requests

import re

class Spider:
    
    def __init__(self,
                 url,
                 charset='gbk',
                 json = None,

                 ):

        self.url = url
        self.json = json
        self.charset = charset
        self.html = self.html()

    def html(self):

        res = requests.get(self.url)
        res.encoding = self.charset

        if self.json != None:
            return res.json()
       
        html = res.text
        return html

    def info(self,**regex):

        info_dict = {}

        for key,value in regex.items():

            info_dict[key]=re.findall(value,self.html)

        return info_dict
            

if __name__ == '__main__':
    s = Spider()
    html = s.html()
    #x = Spider('https://www.x23us.com/html/28/28373/')
    #info = x.info(book_name = '<meta name="og:novel:book_name" content="(.*?)"/>',
    #book_author = '<meta name="og:novel:author" content="(.*?)"/>',
    #book_info = '<td class="L"><a href="(.*?).html">(.*?)</a></td>',
                 
    





















    
